* ***************************************************************** *
* ***************************************************************** *
*   File-Name:      data.do                                       *
*   Date:           June 6, 2016                                   *
*   Author:         TG, Shukova                                     *
*   Purpose:      	Recoding of aggregated survey data              *
*   Input File:     aggregate.dta                                   *
*   Output File:    recode.dta                                       *
* ****************************************************************  *
* ****************************************************************  *


version 14.2
clear
capture log close
set more off


**********************************************
* Specify your own local path to the dropbox *
**********************************************


* Thomas G.'s local (office) path:
* local path /Users/gschwend/Dropbox/Andre/OUP Book

* Thomas G.'s local (Air) path:
* local path /Users/thomasgschwend/Dropbox/Andre/OUP Book/
 



use aggregate, clear                     /* load dataset */



* Check data consistency 0
sum  Q17_party1
return list
assert r(N)== 20256  & r(min)==0 & r(max)==10 & r(sum)==109140

sum  Q17_party2
return list
assert r(N)== 20212  & r(min)==0 & r(max)==10 & r(sum)==100219


/*

* generate election-specific id
gen eid =.
replace eid = 111 if ELECID==5  //France, National, IDF (Paris)
replace eid = 112 if ELECID==6  //France, National, Provence
replace eid = 101 if ELECID==22 //France, local, IDF (Paris)
replace eid = 102 if ELECID==23 //France, local, Provence (Marseille)
replace eid = 131 if ELECID==17 //France, Europe, IDF (Paris)
replace eid = 132 if ELECID==16 //France, Europe, Provence

replace eid = 211 if ELECID==10 //Germany, National, Lower Sax
replace eid = 212 if ELECID==12  //Germany, National, Bavaria
replace eid = 201 if ELECID==11 //Germany, regioal, Lower Sax
replace eid = 202 if ELECID==13 //Germany, regional, Bavaria
replace eid = 231 if ELECID==18 //Germany, Europe, Lower Sax
replace eid = 232 if ELECID==19 //Germany, Europe, Bavaria

replace eid = 311 if ELECID==7  //Spain, National, Catalonia
replace eid = 312 if ELECID==9  //Spain, National, Madrid
replace eid = 301 if ELECID==8  //Spain, regional, Catalonia
replace eid = 302 if ELECID==24  //Spain, regional, Madrid
replace eid = 331 if ELECID==20 //Spain, Europe, Catalonia
replace eid = 332 if ELECID==21 //Spain, Europe, Madrid


*label define eid 111 "France, national, IDF" 112 "France, National, Provence"
*label value eid eid
*/

**************************************************************************
*************			  PARTY PREFERENCES  				 *************
**************************************************************************


*preserve 						

egen maxrow = rowmax(Q17_party1 Q17_party2 Q17_party3 Q17_party4 Q17_party5 Q17_party6 Q17_party7 Q17_party8 Q17_party9)
** preserve than restore and I don't have to reaload the daya and recode it again!
local part "party1 party2 party3 party4 party5 party6 party7 party8 party9"
foreach v in  `part' {
	   gen max`v' = maxrow - Q17_`v' 
       label var max`v' "Distance of `v' to most preferred party"
       rename Q17_`v' y`v'
	   label var y`v' "Like/Dislike of party `v'"
	   }

gen int seqn = _n

reshape long y, i(seqn) j(vname1) string

drop if y == .
gen caseid = _n /*** needed to uniquely define order of pairs (seqn, y) ***/

sort seqn y caseid


* 9 is max number of parties
forvalues i = 1(1)9 {
by seqn: gen win_pref`i' = y[_N-(`i'-1)]
label var win_pref`i' "like/dislike Score of `i'. preferred party"
by seqn: gen str name_pref`i'= vname1[_N-(`i'-1)]
		}
		
drop caseid		/*** needed to uniquely define order, but now drop it ***/
		
	
 * win`i' enthält den Wert und name`i' den Namen der i.ten meist populären Partei
 reshape wide y, i(seqn) j(vname1) string
*drop seqn y*



* Generate dummy that indicates ties
gen TiePartyPref12 = . 
replace TiePartyPref12 = 1 if win_pref1 == win_pref2
replace TiePartyPref12 = 0 if win_pref1 != win_pref2

gen TiePartyPref13 = 0 
replace TiePartyPref13 = 1 if win_pref1 == win_pref3 & TiePartyPref12 == 1
replace TiePartyPref13 = . if TiePartyPref12 == .

gen TiePartyPref14 = 0 
replace TiePartyPref14 = 1 if win_pref1 == win_pref4 & TiePartyPref13 == 1
replace TiePartyPref14 = . if TiePartyPref13 == .



* Q8 Vote Intention (pre) 

gen Q8 = Q8A
recode Q8 88/max=.  /* recode "other party" to mising */
label var Q8 "Vote intention"

forvalues v = 1(1)9 {
	   gen vote`v' = (Q8 ==`v') 
	   replace vote`v' =. if Q8==.
       label var vote`v' "Vote intention for party `v'"
	   }


* Generate partyNum numeric variable indicating most (and 2nd most) preferred party


gen prefPartyNum1 = regexr(name_pref1,"party","")
destring prefPartyNum1, replace
label var prefPartyNum1 "most preferred party"

gen prefPartyNum2 = regexr(name_pref2,"party","")
destring prefPartyNum2, replace
label var prefPartyNum2 "2nd most preferred party"


forvalues v = 1(1)9 {
	   gen pref`v' = regexr(name_pref`v',"party","")
	   destring pref`v', replace
       label var pref`v' "Number of  `v'. preferred party"
	   }





encode COUNTRY, gen(country)
drop COUNTRY
ren country COUNTRY
* 1= France, 2=Germany, 3=Spain

* Generate dummy that indicates whether a respondent voted for her preferred party

* define strategic if you vote for party that is not clearly most preferred
* votes for party that is tied on first preference are non sincere (strategic)!!!!

gen nonsincere = .
replace nonsincere = 1 if prefPartyNum1 != Q8 & TiePartyPref12==0
replace nonsincere = 0 if prefPartyNum1 == Q8 & TiePartyPref12==0
replace nonsincere = 1 if prefPartyNum1 == Q8 & prefPartyNum2 != Q8 & TiePartyPref12==1

*tab  eid nonsincere, row

* as nonsincere but a vote for a party tied on first-preference is sincere
gen nonsincere1 = .
replace nonsincere1 = 1 if prefPartyNum1 != Q8 & TiePartyPref12==0
replace nonsincere1 = 0 if prefPartyNum1 == Q8 & TiePartyPref12==0
replace nonsincere1 = 0 if prefPartyNum1 != Q8 & prefPartyNum2 == Q8 & TiePartyPref12==1

*tab  eid nonsincere1, row



*tab  eid sincere, row



**************************************************
** Code the DVs: Two Forms of Strategic Voting  **
**************************************************


** construct variable that indicates wether most preferred party (or 2nd most) 
** wins a seat in district or not!


*create id to merge in party information for most-preferred parties
gen idMP1 = eid*100 + prefPartyNum1 
gen idMP2 = eid*100 + prefPartyNum2




gen caseid = _n
sort idMP1 caseid
merge m:1 idMP1 using "parties_mergeidMP1.dta"
label var large_partyMP1 "most preferred party is a large party"
tab _merge
keep if _merge!=2
drop _merge caseid



gen caseid = _n
sort idMP2 caseid
merge m:1 idMP2 using "parties_mergeidMP2.dta"
label var large_partyMP2 "2nd most preferred party is a large party"
tab _merge
keep if _merge!=2
drop _merge caseid

*********************************************************************************************
*** Indicator for wasted vote: Is most preferred party viable to win a seat?
gen seat_current0MP1 = (seat_currentMP1==0)
gen seat_previous0MP1 = (seat_previousMP1==0)

label var seat_current0MP1 "most preferred party not viable (currently no seat)"
label var seat_previous0MP1 "most preferred party not viable (previously no seat)"

* get district-level recodes in (same as in country specific do-files)

**** SPAIN
* based-on district-level results
replace seat_current0MP1=0 if eid==301 /*Catalonia Regional Election */
replace seat_current0MP1=1 if eid==301 & partynumber==9 & district!=1
replace seat_current0MP1=1 if eid==301 & partynumber==8 
replace seat_current0MP1=1 if eid==301 & partynumber==7
replace seat_current0MP1=1 if eid==301 & partynumber==6 & (district==4 | district==3)

replace seat_previous0MP1=0 if eid==301 /*Catalonia Regional Election */
replace seat_previous0MP1=1 if eid==301 & partynumber==9 
replace seat_previous0MP1=1 if eid==301 & partynumber==8 
replace seat_previous0MP1=1 if eid==301 & partynumber==7 & (district==2 | district==3)
replace seat_previous0MP1=1 if eid==301 & partynumber==6 & (district!=1)
replace seat_previous0MP1=1 if eid==301 & partynumber==3 & (district==3)


replace seat_current0MP1=0 if eid==311 /*Catalonia National Election */
replace seat_current0MP1=1 if eid==311 & partynumber==7
replace seat_current0MP1=1 if eid==311 & partynumber==6 
replace seat_current0MP1=1 if eid==311 & partynumber==5 & (district==2 | district==3)
replace seat_current0MP1=1 if eid==311 & partynumber==3 & district!=1

replace seat_previous0MP1=0 if eid==311 /*Catalonia National Election */
replace seat_previous0MP1=1 if eid==311 & partynumber==7 
replace seat_previous0MP1=1 if eid==311 & partynumber==6 
replace seat_previous0MP1=1 if eid==311 & partynumber==5 & (district==2 | district==3)
replace seat_previous0MP1=1 if eid==311 & partynumber==3 & (district==2 | district==4)
replace seat_previous0MP1=1 if eid==311 & partynumber==2 & (district==4)

**************
*** France ***
**************


**eid=111
gen caseid = _n
sort district_name caseid 
merge m:1 district_name using "partycodes.dta"
tab _merge

* Which districts are not covered? Paris 14 was won in 1st round. Not in sample.
tab district_name if _merge==1
drop if _merge==2
drop _merge caseid

gen notwastedMP1 = (prefPartyNum1 == party1 | prefPartyNum1 == party2 | prefPartyNum1 == party3)
replace notwastedMP1=. if prefPartyNum1==.
replace notwastedMP1 = . if eid!=111 

drop  party1 party2 party3 departementcode departementname 


*** eid=112
gen caseid = _n
sort district_name caseid 
merge m:1 district_name using "partycodes.dta"
tab _merge

tab district_name if _merge==1

/* Districts won in first round
					party1		party2	party3
ALPES MARITIMES	6	SOC1		UMP2	FN3
ALPES MARITIMES	7	UMP2		FN3
*/

drop if _merge==2
drop _merge caseid


tab eid

gen notwasted1MP1 = 0    if eid==112
replace notwasted1MP1 = 1 if prefPartyNum1 == 1  & district_name== "ALPES MARITIMES 6"
replace notwasted1MP1 = 1 if prefPartyNum1 == 2  & district_name== "ALPES MARITIMES 6"
replace notwasted1MP1 = 1 if prefPartyNum1 == 3  & district_name== "ALPES MARITIMES 6"
replace notwasted1MP1 = 1 if prefPartyNum1 == 2  & district_name== "ALPES MARITIMES 7"
replace notwasted1MP1 = 1 if prefPartyNum1 == 3  & district_name== "ALPES MARITIMES 7"
replace notwasted1MP1 = . if eid!=112 /* make sure this only applies to those districts */


replace notwastedMP1=notwasted1MP1 if eid==112

tab notwastedMP1 eid

drop  party1 party2 party3 departementcode departementname

* now fix  district level results: France national
replace seat_current0MP1 = 1 if notwastedMP1==0

*assume for France that previous is the same as current*
*replace seat_current0MP1 = 1 if notwastedMP1==0
*********************************************************************************************

*seat_current0MP1 seat_current0


*********************************************************************************************
*** Indicator for wasted vote: Is most preferred party viable to win a seat?
gen seat_current0MP2 = (seat_currentMP2==0)
gen seat_previous0MP2 = (seat_previousMP2==0)

label var seat_current0MP2 "most preferred party not viable (currently no seat)"
label var seat_previous0MP2 "most preferred party not viable (previously no seat)"

* get district-level recodes in (same as in country specific do-files)

**** SPAIN
* based-on district-level results
replace seat_current0MP2=0 if eid==301 /*Catalonia Regional Election */
replace seat_current0MP2=1 if eid==301 & partynumber==9 & district!=1
replace seat_current0MP2=1 if eid==301 & partynumber==8 
replace seat_current0MP2=1 if eid==301 & partynumber==7
replace seat_current0MP2=1 if eid==301 & partynumber==6 & (district==4 | district==3)

replace seat_previous0MP2=0 if eid==301 /*Catalonia Regional Election */
replace seat_previous0MP2=1 if eid==301 & partynumber==9 
replace seat_previous0MP2=1 if eid==301 & partynumber==8 
replace seat_previous0MP2=1 if eid==301 & partynumber==7 & (district==2 | district==3)
replace seat_previous0MP2=1 if eid==301 & partynumber==6 & (district!=1)
replace seat_previous0MP2=1 if eid==301 & partynumber==3 & (district==3)


replace seat_current0MP2=0 if eid==311 /*Catalonia National Election */
replace seat_current0MP2=1 if eid==311 & partynumber==7
replace seat_current0MP2=1 if eid==311 & partynumber==6 
replace seat_current0MP2=1 if eid==311 & partynumber==5 & (district==2 | district==3)
replace seat_current0MP2=1 if eid==311 & partynumber==3 & district!=1

replace seat_previous0MP2=0 if eid==311 /*Catalonia National Election */
replace seat_previous0MP2=1 if eid==311 & partynumber==7 
replace seat_previous0MP2=1 if eid==311 & partynumber==6 
replace seat_previous0MP2=1 if eid==311 & partynumber==5 & (district==2 | district==3)
replace seat_previous0MP2=1 if eid==311 & partynumber==3 & (district==2 | district==4)
replace seat_previous0MP2=1 if eid==311 & partynumber==2 & (district==4)


*** France

**eid=111
gen caseid = _n
sort district_name caseid 
merge m:1 district_name using "partycodes.dta"
tab _merge 

* Which districts are not covered? Paris 14 was won in 1st round. Not in sample.
tab district_name if _merge==1
drop if _merge==2
drop _merge caseid

gen notwastedMP2 = (prefPartyNum2 == party1 | prefPartyNum2 == party2 | prefPartyNum2 == party3)
replace notwastedMP2=. if prefPartyNum2==.
replace notwastedMP2 = . if eid!=111

drop  party1 party2 party3 departementcode departementname 

*** eid=112
gen caseid = _n
sort district_name caseid
merge m:1 district_name using "partycodes.dta"
tab _merge

tab district_name if _merge==1

/* Districts won in first round
					party1		party2	party3
ALPES MARITIMES	6	SOC1		UMP2	FN3
ALPES MARITIMES	7	UMP2		FN3
*/

drop if _merge==2
drop _merge caseid

gen notwasted1MP2 = 0    if eid==112
replace notwasted1MP2 = 1 if prefPartyNum2 == 1  & district_name== "ALPES MARITIMES 6"
replace notwasted1MP2 = 1 if prefPartyNum2 == 2  & district_name== "ALPES MARITIMES 6"
replace notwasted1MP2 = 1 if prefPartyNum2 == 3  & district_name== "ALPES MARITIMES 6"
replace notwasted1MP2 = 1 if prefPartyNum2 == 2  & district_name== "ALPES MARITIMES 7"
replace notwasted1MP2 = 1 if prefPartyNum2 == 3  & district_name== "ALPES MARITIMES 7"                                     
replace notwasted1MP2 = . if eid!=112 /* make sure this only applies to those districts */

replace notwastedMP2=notwasted1MP2 if eid==112
tab eid notwastedMP2
drop  party1 party2 party3 departementcode departementname


* now fix  district level results: France national
replace seat_current0MP2 = 1 if notwastedMP2==0

*assume for France that previous is the same as current*
*replace seat_current0MP1 = 1 if notwastedMP1==0
*********************************************************************************************
compress
save recode, replace









/* Old code







/*
A voter is coded as if she follows the wasted-vote strategy if she 
(1) does not cast her vote for the most preferred party, 
(2) her most-preferred party is not expected to be viable, i.e. is 
    not expected to win at least one seat in the voter’s electoral district and, 
(3) she votes for a party instead that is expected to gain representation
*/


exit












forvalues v = 1(1)9 {
	   gen nonsincere_`v' =.
	   replace nonsincere`_v' = 1 if prefPartyNum1 != `v' & vote`v'==1 
	   replace nonsincere_`v' = 0 if prefPartyNum1 == `v' & vote`v'==1
	   replace nonsincere_`v' = 1 if prefPartyNum1 == `v' & vote`v'==1 & prefPartyNum2 != `v' & TiePartyPref12==1
	   replace nonsincere_`v' = 0 if prefPartyNum1 != `v' & vote`v'==1 & prefPartyNum2 == `v' & TiePartyPref12==1
       label var nonsincere_`v' "Non-sincere (=1) vote for party `v'"
	   }





gen level_new = 1 if LEVEL=="National"
replace level_new = 2 if LEVEL=="Regional"
replace level_new = 3 if LEVEL=="European"


collapse (mean) sincere nonsincere*  level_new COUNTRY, by(eid)


replace nonsincere  = nonsincere*100
replace nonsincere1 = nonsincere1*100
replace sincere = sincere*100

label define level_new 2 "Regional" 1 "National" 3 "Euopean"
label value level_new level_new

gen national = (level_new==1)
gen regional = (level_new==2)



label define COUNTRY 1 "France" 2 "Germany" 3 "Spain"
label value COUNTRY COUNTRY

gen Germany = (COUNTRY==2)
gen Spain   = (COUNTRY==3)
gen France  = (COUNTRY==1)



save party, replace 

use party, clear

#delimit ;
graph box nonsincere, over(level_new)
	scheme(s1mono)     // use white background to save ink graphregion(color(white))
		graphregion(fcolor(white)) 
		ylabel(20(10)50, labsize(small) angle(horizontal) nogrid) yscale(nofextend) 
		plotregion(lcolor(white) margin(l=4))
saving(box1, replace);                                 // - , replace - to let the graph be replaced by a new one
#delimit cr	
graph export box1.pdf, replace


*graph box nonsincere1, over(level_new)

#delimit ;
twoway scatter nonsincere  COUNTRY, mlabel(level_new)
	legend(off)        // get rid of legend
	scheme(s1mono)     // use white background to save ink graphregion(color(white))
	xtitle(" ", color(black) alignment(bottom )) xscale(noline)  
	ytitle("Share of Non-Sincere Voting")              // incl. axis titles
	ylabel(20(10)50, labsize(small) angle(horizontal)) yscale(nofextend)  // use smaler & readable labels
	xlabel(0 " " 1 "France" 2 "Germany" 3 "Spain" 4" ", notick)                                    // use less labels
		graphregion(fcolor(white)) 
	plotregion(lcolor(white) margin(l=4))	/* get rid of box (give it the same color as the background) except the axes & increase left margin*/
saving(graph1, replace);                                 // - , replace - to let the graph be replaced by a new one
#delimit cr	
graph export graph1.pdf, replace

reg nonsincere national regional 
eststo est0
reg nonsincere national regional Germany Spain
eststo est1
reg nonsincere1 national regional
eststo est2
reg nonsincere1 national regional Germany Spain
eststo est3


#delimit ;
esttab est0 est1 est2 est3 using regional_level.rtf, replace b(%9.2f) se(%9.2f)  rtf  noobs star(* 0.10 ** 0.05)
title ("Table X.1: Non-Sincere Voting across Electoral Arenas")
varlabels(_cons "Constant" nonsincere1 "Non-Sincere" national "National" regional "Regional"
          Germany "Germany" Spain "Spain" )
stats(r2) sfmt(%9.0f)  sca("r2 R-squared") nonotes
addnote ("N = `e(N)'. Standard errors in parentheses. * p < 0.10, ** p < 0.05")
order(national regional Germany Spain)
;
#delimit cr


************ Sincere Voting


#delimit ;
graph box sincere, over(level_new)
	scheme(s1mono)     // use white background to save ink graphregion(color(white))
		graphregion(fcolor(white)) 
		ylabel(70(10)100, labsize(small) angle(horizontal) nogrid) yscale(nofextend) 
		plotregion(lcolor(white) margin(l=4))
saving(boxsincere, replace);                                 // - , replace - to let the graph be replaced by a new one
#delimit cr	
graph export boxsincere.pdf, replace





#delimit ;
twoway scatter sincere  COUNTRY, mlabel(eid)
	legend(off)        // get rid of legend
	scheme(s1mono)     // use white background to save ink graphregion(color(white))
	xtitle(" ", color(black) alignment(bottom )) xscale(noline)  
	ytitle("Share of Sincere Voting")              // incl. axis titles
	ylabel(70(10)100, labsize(small) angle(horizontal)) yscale(nofextend)  // use smaler & readable labels
	xlabel(0 " " 1 "France" 2 "Germany" 3 "Spain" 4 " ", noticks)                                    // use less labels
		graphregion(fcolor(white)) 
	plotregion(lcolor(white) margin(l=4))	/* get rid of box (give it the same color as the background) except the axes & increase left margin*/
saving(scatter_sincere1, replace);                                 // - , replace - to let the graph be replaced by a new one
#delimit cr	
graph export scatter_sincere1.pdf, replace




#delimit ;
twoway scatter sincere  COUNTRY, mlabel(level_new)
	legend(off)        // get rid of legend
	scheme(s1mono)     // use white background to save ink graphregion(color(white))
	xtitle(" ", color(black) alignment(bottom )) xscale(noline)  
	ytitle("Share of Sincere Voting")              // incl. axis titles
	ylabel(40(10)80, labsize(small) angle(horizontal)) yscale(nofextend)  // use smaler & readable labels
	xlabel(2 "France" 4 "Germany" 3 "Spain" 5 " ")                                    // use less labels
		graphregion(fcolor(white)) 
	plotregion(lcolor(white) margin(l=4))	/* get rid of box (give it the same color as the background) except the axes & increase left margin*/
saving(scatter_sincere, replace);                                 // - , replace - to let the graph be replaced by a new one
#delimit cr	
graph export scatter_sincere.pdf, replace


reg sincere national regional 
eststo est0
reg sincere national regional Germany Spain
eststo est1



#delimit ;
esttab est0 est1  using regional_levelsincere.rtf, replace b(%9.2f) se(%9.2f)  rtf  noobs star(* 0.10 ** 0.05)
title ("Table X.2: Sincere Voting across Electoral Arenas")
varlabels(_cons "Constant" sincere "Sincere Voting" national "National" regional "Regional"
          Germany "Germany" Spain "Spain" )
stats(r2) sfmt(%9.0f)  sca("r2 R-squared") nonotes
addnote ("N = `e(N)'. Standard errors in parentheses. * p < 0.10, ** p < 0.05")
order(national regional Germany Spain)
;
#delimit cr


save eid, replace

restore


*/


